################################
#### Manifestos Dataset ########
################################

## Purpose: This dataset analyzes patterns 
## in the Comparative Manifesots Project. We include
## this analysis in the SI, section K. 
## Data In:
## Comparative Manifestos Project data:
## data_files/MPDataset_MPDS2024a.xlsx
## Follow the instructions in the README for how
## to downlaod this dataset. Save the downloaded
## dataset in the "data_files" folder.
## Data Out:
## figures/manifestos_index.pdf

##################################
### Libraries and Dependencies ###
##################################

## set working directory to 
## the Dataverse replication folder

library(openxlsx) ## version 4.2.6.1
library(tidyverse) ## version 2.0.0

manifestos <- read.xlsx("data_files/MPDataset_MPDS2024a.xlsx")

## limit to united states
## documentation that country "61" is the United States
manifestos <- manifestos[manifestos$country == 61, ]

## limit to Democratic, Republican party
## Manifestos
manifestos <- manifestos[manifestos$partyname %in%
                           c("Democratic Party",
                             "Republican Party"), ]

## clean year 
manifestos$year <- gsub("([0-9]{4}).*",
                        "\\1", manifestos$date)

## The CMP collects the party platforms
## before each presidential election. The authors
## then break down each platform into "quasi sentences"
## and assigns an issue area to each sentence.
## The dataset then reports the percent of quasi sentences
## devoted to each issue area.
## .05 means 5% of quasi-sesntences
## were assigned to that code in that year. 

## We create an index for economic-distributive issues
## and socio-cultural issues. We use the same issue
## items used by Gethin et. al. (2022), although 
## we use a different weight scheme to create the indices. 


## limit manifestos to relevant variables
## partyname, year, and selected issue areas
manifestos <- manifestos %>%
  select(partyname,
         year,
         per201, ## freedom and human rights
         per202, ## democracy
         per305, ## political authority
         per401, ## free market
         per402, ## supply side economics - not included, but lets add to Republican economic
         per403, ## market regulation 
         per404, ## economic planning
         per405, ## corporatism 
         per406, ## protectionism: positive
         per407, ## protectionism: negative
         per409, ## Keynesian demand management: positive
         per410, ## Economic Growth (productivity): positive
         per412, ## controlled economy 
         per413, ## Nationalisation 
         per414, ## Economic orthodoxy: positive
         per415, ## Marxist anaalysis
         per416, ## anti growth 
         per501, ## Environmental protection 
         per502, ## Culture: positive 
         per503, ## Equality: positive / social justice
         per504, ## welafare state expansion
         per505, ## welfare state limitation
         per506, ## education expansion
         per507, ## education limitation 
         per601, ## National way of life: positive  
         per602, ## National way of life: negative 
         per603, ## traditional morality - positive
         per604, ## traditioanl morality -negative
         per605, ## law and order: positive 
         per607, ## muliculturalism: posiitve 
         per608, ## multiculturalism: negative 
         per701, ## 701 is labor groups positive
         per702, ## labor groups - engative 
         per705, ## underprivilegd minotiry grups. - ppositive
         per706) ## non-economic demographic groups )

## create long version
manifestos_long <- manifestos %>%
  gather(key = "characteristic",
         value = "share",
         -partyname,
         -year)

## share indicates the share of quasi-sentence in respective
## category calculated as fraction of the overall number of 
## allocated codes (quasi sentences assigned a code) 
## per manifesto document

## clean variables
manifestos_long$year <- paste0(manifestos_long$year,
                               "-01-01")
manifestos_long$year <- as.Date(manifestos_long$year)
manifestos_long$partyname <- factor(manifestos_long$partyname,
                                    levels = c("Republican Party",
                                               "Democratic Party"))

## create substantive variable name for each issue area
manifestos_long$characteristic_val <- dplyr::recode(manifestos_long$characteristic,
                                                `per401` = "Free Market Economy",
                                                `per402` = "Incentives/Supply Side Economics",
                                                `per403` = "Market Regulation",
                                                `per404` = "Economic Planning",
                                                `per405` = "Corporatism",
                                                `per406` = "Protectionism:Positive",
                                                `per407` = "Protectionism:Negative",
                                                `per408` = "Economic Goals",
                                                `per409` = "Keynsian Demand Side",
                                                `per410` = "Economic Growth:Positive",
                                                `per411` = "Technology/Infrastructure:Positive",
                                                `per412` = "Controlled Economy",
                                                `per413` = "Nationalization of Economy",
                                                `per414` = "Balanced Budget",
                                                `per415` = "Marxism",
                                                `per416` = "Economic Growth:Negative",
                                                `per501` = "Environmentalism",
                                                `per502` = "Culture:Positive",
                                                `per503` = "Equality: Positive",
                                                `per504` = "Welfare State Expansion",
                                                `per505` = "Welfare State Limitation",
                                                `per506` = "Education Expansion",
                                                `per507` = "Education Limitation",
                                                `per601` = "Patriotism/Nationalism:Positive",
                                                `per602` = "Patriotism/Nationalism:Negative",
                                                `per603` = "Traditional Morality:Positive",
                                                `per604` = "Traditional Morality:Negative",
                                                `per605` = "Law and Order: Positive",
                                                `per606` = "Civic Mindedness: Positive",
                                                `per607` = "Multiculturalism:Positive",
                                                `per608` = "Multiculturalism:Negative",
                                                `per701` = "Labour Groups: Positive",
                                                `per702`= "Labour Groups: Negative",
                                                `per703`="Farmers: Positive",
                                                `per704` = "Middle Class: Positive",
                                                `per705` = "Minorities:Positive",
                                                `per706` = "Other:Positive",
                                                `per305` = "Political Authority",
                                                `per202` = "Democracy",
                                                `per201` = "Freedom and Human Rights")


## for free market side of economic index, we sum 
## the share of quasi sentences for issue areas
## related to free-market, following strategy 
## by Gethin et. al. for which issue areas 
## reflect free market ideology. 
econ_distributive_free_market <- manifestos_long %>%
  filter(characteristic %in% 
           c("per401", "per402", "per407", "per505", "per507", 
             "per410", "per414", 
             "per702")) %>%
  group_by(year, partyname) %>%
  summarize(share_econ_free_market = sum(share))

## for redistribution side of economic index, we sum 
## the share of quasi sentences for issue areas
## advocating redistribution, following strategy 
## by Gethin et. al. for which issue areas 
## reflect support for liberal (left) redistributive
## economic ideology. 
econ_distributive_redistribution <- manifestos_long %>%
  filter(characteristic %in% 
           c("per403", "per404", "per406",
             "per504", "per506", "per413",
             "per412", "per701", "per405",
             "per409", "per415", "per503")) %>%
  group_by(year, partyname) %>%
  summarize(share_econ_distributive = sum(share))

## for conservative side of culture index
## we sum the issue areas identified by Gethin et. al.
## as reflect conservative cultural values
culture_conservative <- manifestos_long %>%
  filter(characteristic %in%
           c("per305", "per601", "per603",
             "per605", "per608"))  %>%
  group_by(year, partyname) %>%
  summarize(share_culture_conservative = sum(share))

## for progressive side of culture index
## we sum the issue areas identified by Gethin et. al.
## as reflect progressive cultural values
culture_progressive <- manifestos_long %>%
  filter(characteristic %in%
           c("per501", "per602", "per604",
             "per502", "per607", "per416",
             "per705", "per706", "per201",
             "per202"))  %>%
  group_by(year, partyname) %>%
  summarize(share_culture_progressive = sum(share))


## joining indices
overall <- left_join(econ_distributive_free_market,
                      econ_distributive_redistribution,
                        by = c("year", "partyname"))

overall <- left_join(overall,
                       culture_conservative,
                      by = c("year", "partyname"))
overall <- left_join(overall,
                       culture_progressive,
                       by = c("year", "partyname"))

## creating dimension by right leaning indices
## from left leaning indices
overall$econ_distributive_dimension <- -overall$share_econ_free_market +
  overall$share_econ_distributive

overall$cultural <- -overall$share_culture_conservative +
  overall$share_culture_progressive

## create long version
overall <- overall %>%
  select(year, partyname,
         cultural,
         econ_distributive_dimension) %>%
  gather(key = "dimension",
         value = "share",
         -year, -partyname)

## recode dimension labels
overall$dimension <- dplyr::recode(
  overall$dimension,
  `cultural` = "Sociocultural Index",
  `econ_distributive_dimension` = "Economic-Distributive Index"
)

ggplot(overall,
       mapping = aes(x = year,
                     y = share,
                     color = partyname,
                     group = partyname)) +
  #geom_line() +
  geom_point(alpha = .4) +
  geom_smooth(method = "loess",
              se = FALSE) +
  facet_wrap(~ dimension) +
  theme(legend.position = "bottom") +
  labs(color = NULL,
       x = NULL,
       y = "Party Platform Share:\nMore Conservative ---- More Liberal") +
  theme_bw() +
  theme(legend.position = "bottom")
# figures/manifestos_index
## 5x7


## look at individual attention to specific redistributive concerns
## per504: welfare state expansion
## per505 welfare state limitation
## per409: keynesian demand management

manifestos$year <- paste0(manifestos$year,
                               "-01-01")
manifestos$year <- as.Date(manifestos$year)
manifestos$partyname <- factor(manifestos$partyname,
                                    levels = c("Republican Party",
                                               "Democratic Party"))


welfare <- manifestos %>%
  group_by(partyname,
           year) %>%
  summarize(welfare = per504 - per505)

ggplot(welfare,
       mapping = aes(x = as.numeric(year),
                     y = welfare,
                     color = partyname)) +
  geom_point() + 
  geom_line() +
  theme(legend.position = "bottom") +
  labs(x = NULL,
       color = NULL,
       y = "Percent Mentions of Welfare State Expansion -\nPercent Mentions of Welfare State Limitation")


ggplot(manifestos,
       mapping = aes(x = year,
                     y = per409,
                     color = partyname)) +
  geom_point() + 
  geom_line() +
  theme(legend.position = "bottom") +
  labs(x = NULL,
       color = NULL,
       y = "Percent Mentions of Welfare State Expansion -\nPercent Mentions of Welfare State Limitation")


